Loading libraries

library(dplyr)
library(ggplot2)
library(plotly)
library(tidyr)

Reading data

data <- read.csv2('./all_summary.csv', nrows = 10000)
dim(data)
## [1] 10000   412

Deleting chosen ligands

deletable_res_name <- c("UNK", "UNX", "UNL", "DUM", "N", "BLOB", "ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS", "ILE", "LEU", "LYS", "MET", "MSE", "PHE", "PRO", "SEC", "SER", "THR", "TRP", "TYR", "VAL", "DA", "DG", "DT", "DC", "DU", "A", "G", "T", "C", "U", "HOH", "H20", "WAT")
data <- data[!data$res_name %in% deletable_res_name,]
dim(data)
## [1] 9940  412

Processing missing data

#data <- data[complete.cases(data), ]
#dim(data)

Data summary

statistics <- data %>%
  select(res_name, blob_volume_coverage, blob_volume_coverage_second, skeleton_density)
knitr::kable(summary(statistics))
res_name blob_volume_coverage blob_volume_coverage_second skeleton_density
SO4 :1007 1 : 138 0 :8384 0 :1144
GOL : 632 0.8461538462: 6 0.0243902439 : 2 1 : 993
EDO : 516 0.8571428571: 6 0.02523659306: 2 0.6666666667: 530
NAG : 464 0.75 : 5 0.0200661832 : 1 0.5 : 289
CL : 387 0.8333333333: 5 0.02009536785: 1 0.1666666667: 233
(Other):6770 0.3266490765: 4 0.02016883762: 1 0.1538461538: 229
NA’s : 164 (Other) :9776 (Other) :1549 (Other) :6522
dim(data)
## [1] 9940  412

Cardinality of ligands by name

plot <- ggplot(popular_names, aes(x = reorder(res_name, -cardinality), y = cardinality, fill = cardinality)) +
  geom_bar(stat = "identity") +
  theme(axis.text.x = element_text(angle = 90)) +
  xlab("ligand")+
  labs(title = "Cardinality of ligands by name")

ggplotly(plot)

Distribution of atom and electron count

plot_atom <- ggplot(data, aes(x = local_res_atom_non_h_count)) + 
  geom_density(alpha = .3, fill = "#00CECB", color = NA) +
  xlab("atom count") +
  labs(title = "Atom count distribution")

ggplotly(plot_atom)
plot_electron <- ggplot(data, aes(x = local_res_atom_non_h_electron_sum)) + 
  geom_density(alpha = .3, fill = "#FF5E5B", color = NA) +
  xlab("electron count") +
  labs(title = "Electron count distribution")

ggplotly(plot_electron)